# ------------------------------------------------------------------------------#
# Description: Create Table A.6: Relationship between drainage problems and eligible peers
# Project:     Peer Effects in Voluntary Environmental Policies:
#              An Application to Urban Water Quality
# Author:      Daniel A. Brent | dab320@psu.edu
# Created:     2025-06-11
# ------------------------------------------------------------------------------#

# Clear environment ------------------------------------------------------------
rm(list = ls())
gc()

# Load required packages -------------------------------------------------------
library(data.table)
library(fixest)
library(modelsummary)
library(stargazer)

options(scipen = 999)

# Load and merge base data -----------------------------------------------------
load("data/adoptions/adoptions.RData")
adopt[, adopted.any    := max(adopt.any, na.rm = TRUE), by = pin]
adopt[, adopted.rg     := max(adopt.rg, na.rm = TRUE), by = pin]
adopt[, adopted.any.rg := max(adopt.any.rg, na.rm = TRUE), by = pin]
adopt[, adopted.cs     := max(adopt.cs, na.rm = TRUE), by = pin]
adopt[, adopted.both   := max(adopt.both, na.rm = TRUE), by = pin]

adopt[, max.elig.cs := max(elig.cs), by = pin]
adopt[, max.elig.rg := max(elig.rg), by = pin]

# Merge in peer eligibility (CS)
load("data/bucket/peer_elig_buckets_cs.RData")
peer.buckets.cs[, max.elig.cs.100 := max(elig.peers.cs.100), by = pin]
peer.buckets.cs[, max.elig.cs.20  := max(elig.peers.cs.20), by = pin]

dat <- merge(
  unique(adopt[max.elig.cs > 0, .(pin, adopted.any, adopted.rg, adopted.cs, adopted.both,
                                  max.elig.cs, max.elig.rg)]),
  unique(peer.buckets.cs[, .(pin, max.elig.cs.100, max.elig.cs.20)]),
  by = "pin"
)
rm(adopt, peer.buckets.cs)

# Merge in peer eligibility (RG)
load("data/bucket/peer_elig_buckets_rg.RData")
peer.buckets.rg[, max.elig.rg.100 := max(elig.peers.rg.100), by = pin]
peer.buckets.rg[, max.elig.rg.20  := max(elig.peers.rg.20), by = pin]

dat <- merge(dat, unique(peer.buckets.rg[, .(pin, max.elig.rg.100, max.elig.rg.20)]), by = "pin")
rm(peer.buckets.rg)

# Create combined eligibility measures
dat[, max.elig.avg.100 := (max.elig.cs.100 + max.elig.rg.100) / 2]
dat[, max.elig.avg.20  := (max.elig.cs.20  + max.elig.rg.20)  / 2]

# Merge in parcel data
load("data/parcel_build/parcel_build.RData")
dat <- merge(dat, parcel.build[, .(pin, zip5, sub.area, sqft, lot, beds, baths,
                                   yearbuilt, val.land, val.improve, ren.10)],
             by = "pin")
dat[, assess.value := val.land + val.improve]
rm(parcel.build)

# Load and merge drainage complaints
load("data/complaints/all_parcels_drain_complaints.RData")
drain <- merge(dat, parcels.drain, all.x = TRUE)

# Load and merge DEM (elevation)
load("data/dem/all_parcels_dem.RData")
drain <- merge(drain, parcels.dem, all.x = TRUE)

# Load and merge drainage areas
load("data/cdc/cdc.RData")
cdc.near[is.na(near_dist_cdc), near_dist_cdc := 3000]
cdc.near[, cdc.far := fifelse(near_dist_cdc == 3000, 0, 1)]

# Bin distances to nearest CDC facility
cdc.near[near_dist_cdc >= 3000,                        cdc.dist.bin := 6]
cdc.near[near_dist_cdc < 3000 & near_dist_cdc >= 0.4*5280, cdc.dist.bin := 5]
cdc.near[near_dist_cdc < 0.4*5280 & near_dist_cdc >= 0.3*5280, cdc.dist.bin := 4]
cdc.near[near_dist_cdc < 0.3*5280 & near_dist_cdc >= 0.2*5280, cdc.dist.bin := 3]
cdc.near[near_dist_cdc < 0.2*5280 & near_dist_cdc >= 0.1*5280, cdc.dist.bin := 2]
cdc.near[near_dist_cdc < 0.1*5280,                       cdc.dist.bin := 1]

drain <- merge(drain, cdc.near, all.x = TRUE)

# Convert distance to miles and assign quantiles by elevation
drain[, near_dist_cdc_mile := near_dist_cdc / 5280]
drain[near_dist_cdc == 3000, near_dist_cdc_mile := NA]

drain[elevation_ft < 36.5,                             dem.quant := 1]
drain[elevation_ft >= 36.5 & elevation_ft < 61.02,     dem.quant := 2]
drain[elevation_ft >= 61.02 & elevation_ft < 79.6,     dem.quant := 3]
drain[elevation_ft >= 79.6 & elevation_ft < 96,        dem.quant := 4]
drain[elevation_ft >= 96,                              dem.quant := 5]

# Estimate Models ----------------------------------------------------------------
m1 <- lm(max.elig.cs.100 ~ elevation_ft, data = drain)
m2 <- lm(max.elig.cs.100 ~ as.factor(dem.quant), data = drain)
m3 <- lm(max.elig.cs.100 ~ cdc.far, data = drain)
m4 <- lm(max.elig.cs.100 ~ near_dist_cdc_mile, data = drain)
m5 <- lm(max.elig.cs.100 ~ as.factor(cdc.dist.bin), data = drain)
m6 <- lm(max.elig.cs.100 ~ as.factor(cdc.dist.bin) + elevation_ft, data = drain)
m7 <- lm(max.elig.cs.100 ~ as.factor(cdc.dist.bin) + as.factor(dem.quant), data = drain)

# Preview Table A.6 -------------------------------------------------------------
stargazer(m1, m2, m3, m4, m5, m6, m7,
          type = "text", float = FALSE, omit.table.layout = "n",
          dep.var.caption = "", dep.var.labels.include = FALSE, model.names = FALSE,
          omit.stat = c("f", "ser", "adj.rsq", "rsq"), no.space = TRUE,
          covariate.labels = c("Elevation",
                               "Elevation:Q2", "Elevation:Q3", "Elevation:Q4", "Elevation:Q5",
                               "Drainage > 0.5 mile", "Drainage Distance",
                               "Drainage 0.1-0.2 mile", "Drainage 0.2-0.3 mile",
                               "Drainage 0.3-0.4 mile", "Drainage 0.4-0.5 mile",
                               "Drainage > 0.5 mile"))

# Save Table A.6 ---------------------------------------------------------------
stargazer(m1, m2, m3, m4, m5, m6, m7,
          type = "latex",
          out = "output/tables/appendix_a6.tex",
          float = FALSE, omit.table.layout = "n",
          dep.var.caption = "", dep.var.labels.include = FALSE, model.names = FALSE,
          omit.stat = c("f", "ser", "adj.rsq", "rsq"), no.space = TRUE,
          covariate.labels = c("Elevation",
                               "Elevation:Q2", "Elevation:Q3", "Elevation:Q4", "Elevation:Q5",
                               "Drainage > 0.5 mile", "Drainage Distance",
                               "Drainage 0.1-0.2 mile", "Drainage 0.2-0.3 mile",
                               "Drainage 0.3-0.4 mile", "Drainage 0.4-0.5 mile",
                               "Drainage > 0.5 mile"))
